// Copyright (C) 2020-2025 Jonathan Müller and lexy contributors
// SPDX-License-Identifier: BSL-1.0

#define ANKERL_NANOBENCH_IMPLEMENT
#include <nanobench.h>

#include <lexy/input/file.hpp>

bool json_baseline(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_lexy(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_lexy_no_swar(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_lexy_no_buffer(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_pegtl(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_nlohmann(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_rapid(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_boost(const lexy::buffer<lexy::utf8_encoding>& input);
bool json_glaze(const lexy::buffer<lexy::utf8_encoding>& input);

auto get_data(const char* file_name)
{
    auto path   = std::string(LEXY_BENCHMARK_DATA) + "/" + file_name;
    auto result = lexy::read_file<lexy::utf8_encoding>(path.c_str());
    if (!result)
        throw std::runtime_error("unable to read data file");
    return LEXY_MOV(result).buffer();
}

const char* output_prefix()
{
    return R"(---
title: JSON Validation Benchmark
---

// This file is automatically generated by `lexy_benchmark_json`.
// DO NOT MODIFY.

This benchmark measures the time it takes to *validate* JSON, i.e. to check whether it is well-formed.
Validation was chosen as opposed to parsing, as parsing speed depends on the JSON data structure as well.
Implementing an efficient JSON container is out of scope for lexy, so it would have a disadvantage over the specialized JSON libraries.

The average validation times for each input are shown in the boxplots below.
Lower values are better.

[pass]
++++
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
++++

)";
}

const char* output_template()
{
    return R"(
[pass]
++++
<div id="{{title}}"></div>
<script>
    var data = [
        {{#result}}{
            name: '{{name}}',
            y: [{{#measurement}}{{elapsed}}{{^-last}}, {{/last}}{{/measurement}}],
        },
        {{/result}}
    ];
    var title = '{{title}}';

    data = data.map(a => Object.assign(a, { boxpoints: 'all', pointpos: 0, type: 'box' }));
    var layout = { title: { text: title }, showlegend: false, yaxis: { title: 'validation time', rangemode: 'tozero', autorange: true } };
    Plotly.newPlot('{{title}}', data, layout, {responsive: true});
</script>
++++
    )";
}

const char* output_suffix()
{
    return R"(
== The implementations

`baseline`::
    This simply adds all input characters of the JSON document without performing actual validation.
`lexy`::
    A JSON validator using the lexy grammar from the example.
    It uses the regular `lexy::buffer` as input, enabling SWAR and other optimizations.
    For maximum performance, this is the recommended input.
`lexy (no SWAR)`::
    Same as above, but it uses a special input where SWAR optimization has been manually disabled.
`lexy (no buffer)`::
    Same as above, but it uses `lexy::string_input` as the input.
    This is a stand-in for a generic non-buffer input where no input specific optimizations are possible.
`pegtl`::
    A JSON validator using the https://github.com/taocpp/PEGTL[PEGTL] JSON grammar.
`nlohmann/json`::
    A JSON validator using https://github.com/nlohmann/json[JSON for Modern C++] implemented by `nlohmann::json::accept()`.
`rapidjson`::
    A JSON validator using https://github.com/Tencent/rapidjson[rapidjson] implemented using a SAX parser with the `rapidjson::BaseReaderHandler`.
`Boost.JSON`::
    A JSON validator using https://github.com/boostorg/json[Boost.JSON] implemented using a custom parse handler.
`glaze`::
    A JSON validator using https://github.com/stephenberry/glaze[glaze].

== The inputs

`canada.json`::
    Contains lots of 2-element arrays holding floating-point coordinate pairs.
    Taken from https://github.com/miloyip/nativejson-benchmark.
`citm_catalog.json`::
    Big JSON file with some variety.
    Taken from https://github.com/miloyip/nativejson-benchmark.
`twitter.json`::
    Some data from twitter's API.
    Taken from https://github.com/miloyip/nativejson-benchmark.

== The Methodology

The input data is read using `lexy::read_file()`.
The resulting buffer is then passed to the various implementations using their memory inputs.
Benchmarking is done by https://nanobench.ankerl.com/[nanobench] on an 2020 Mac Mini with M1 processor.
    )";
}

int main(int argc, char* argv[])
{
    std::ofstream            out("benchmark_json.adoc");
    ankerl::nanobench::Bench b;

    auto perf = argc == 2 && argv[1] == std::string_view("perf");

    auto bench_data = [&](const char* file) {
        auto data = get_data(file);

        b.title(file).relative(true);
        b.unit("byte").batch(data.size());
        b.minEpochIterations(perf ? 100 : 10);

        if (perf)
            b.run("lexy", [&] { return json_lexy(data); });
        else
        {
            b.run("baseline", [&] { return json_baseline(data); });
            b.run("lexy", [&] { return json_lexy(data); });
            b.run("lexy (no SWAR)", [&] { return json_lexy_no_swar(data); });
            b.run("lexy (no buffer)", [&] { return json_lexy_no_buffer(data); });
            b.run("pegtl", [&] { return json_pegtl(data); });
            b.run("nlohmann/json", [&] { return json_nlohmann(data); });
            b.run("rapidjson", [&] { return json_rapid(data); });
#ifdef LEXY_HAS_BOOST_JSON
            b.run("Boost.JSON", [&] { return json_boost(data); });
#endif
            b.run("glaze", [&] { return json_glaze(data); });
            b.render(output_template(), out);
        }
    };

    out << output_prefix();
    bench_data("canada.json");
    bench_data("citm_catalog.json");
    bench_data("twitter.json");
    out << output_suffix();
}

